require(foreign)
require(MatchIt)
require(cem)
require(mvtnorm)
require(ggplot2)
library(Zelig)

setwd("~/Dropbox/Spring 2015/Gov 2001/Replication Paper/Analysis")

## rename dataset 
data <- read.csv("TDS-a_publicrelease1013.csv", header = )

## make new modified dataset with relevant and renamed variables to work with
## all missing data is preserved as we match all NAs in the original dataset
## with NAs in our dataset after creating each renamed variable

data.mod <- data.frame(rep(NA, nrow(data)))

## did you participate in the first day (Jan 25) of the Tahrir Square protests dummy
data.mod$first.day <- 0
data.mod$first.day[data$q601 == "2"] = 1

## remove column of NAs used to make modified dataset
data.mod[,1] <- NULL

## have you participated in protests previously dummy
data.mod$previously <- 0
data.mod$previously[data$q602 == "2"] = 1 
data.mod$previously[352] <- NA

## gender dummy ## male = 1 female = 0
data.mod$gender <- 0
data.mod$gender[124] <- NA
data.mod$gender[data$q102 == "1"] = 1

## do you have internet at your home dummy
data.mod$int.home <- 0 
data.mod$int.home[data$q105 == "1"] = 1
data.mod$int.home[c(164, 181)] <- NA

## do you have internet on your phone dummy
data.mod$int.phone <- 0
data.mod$int.phone[data$q106 == "1"] = 1

## rename education variable
data.mod$edu <- data$q104

## rename age variable
data.mod$age <- data$q101

## do you read or post to blogs dummy
data.mod$blogs <- 0
data.mod$blogs[data$q271 == "1"] = 1
data.mod$blogs[data$q271 == "2"] = 1
data.mod$blogs[168] <- NA

## do you use email dummy
data.mod$email <- 0
data.mod$email[data$q281 == "1"] = 1
data.mod$email[91] <- NA

## do you use facebook dummy
data.mod$facebook <- 0
data.mod$facebook[data$q261 == "1"] = 1

## do you make phone calls dummy
data.mod$phone <- 0
data.mod$phone[data$q211 == "1"] = 1
data.mod$phone[51] <- NA

## do you read news papers dummy
data.mod$print <- 0
data.mod$print[data$q241 == "1"] = 1

## do you watch satellite tv dummy
data.mod$tv <- 0
data.mod$tv[data$q221 == "1"] = 1
data.mod$tv[590] <- NA

## do you use text messaging dummy
data.mod$sms <- 0
data.mod$sms[data$q201 == "1"] = 1

## do you use twitter dummy
data.mod$twitter <- 0 
data.mod$twitter[data$q251 == "1"] = 1

## political activism dummy
data.mod$activism <- 0
data.mod$activism[data$q107 != 0] <- 1

## treatment of using facebook and twitter 
data.mod$treat <- 0
data.mod$treat[data.mod$facebook == 1 | data.mod$twitter == 1] <- 1

## treatment of using just facebook
data.mod$treat.face <- 0
data.mod$treat.face[data.mod$facebook == 1] <- 1

## treatment of using just twitter
data.mod$treat.twit <- 0
data.mod$treat.twit[data.mod$twitter == 1] <- 1

## eliminate missing observations so matching can be performed
## a total of 9 out of 1048 observations are deleted due to missingness  
complete.data.mod <- data.mod[complete.cases(data.mod),]

## check imbalance to have a baseline to compare it to when matching is used
## and see imbalance of original dataset
imbalance(complete.data.mod$treat, data = complete.data.mod, 
          drop = c("treat", "treat.twit", "treat.face", "first.day", 
                   "weights", "subclass", "distance", "twitter", "facebook"))

##remove <- c(as.numeric(rownames(complete.data.mod
##[complete.data.mod$facebook != 1 & complete.data.mod$twitter == 1,])))

##new.data <- complete.data.mod[-c(349, 612, 717, 790, 944, 955, 949),]

## define cutpoints for all variables when using CEM
cp.age <- c(0, 17.5, 24.5, 34.5, 44.5, 54.5, 64.5, 74.5)
##c(0, 17.5, 24.5, 34.5, 44.5, 54.5, 64.5, 74.5)
cp.edu <- c(-.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5)
cp.previously <- c(-.5, .5)
cp.gender <- c(-.5, .5)
cp.activism <- c(-.5, .5)
cp.int.home <- c(-.5, .5)
cp.int.phone <- c(-.5, .5)
cp.blogs <- c(-.5, .5)
cp.email <- c(-.5, .5)
##cp.twitter <- c(-.5, .5)
cp.phone <- c(-.5, .5)
cp.print <- c(-.5, .5)
cp.tv <- c(-.5, .5)
cp.sms <- c(-.5, .5)

## define list of all cutpoints to be used in matchit()
our.cp <- list(previously = cp.previously, 
               age = cp.age, gender = cp.gender, 
               edu = cp.edu, activism = cp.activism, int.home = cp.int.home,
               int.phone = cp.int.phone, blogs = cp.blogs, email = cp.email, 
               phone = cp.phone, print = cp.print, 
               tv = cp.tv, sms = cp.sms)


####################################################################
#############   Matching first day participation    ################
####################################################################


## match data for all covariates included in the first.day regression
## with treat defined as the treatment variable
first.day.match <- matchit(formula = treat ~ previously + age + gender + edu + 
                             activism + int.home + int.phone + blogs + email + phone +
                             print + tv + sms,  
                           method = "cem", cutpoints = our.cp,
                           data = complete.data.mod)

## get dataset from matched data
first.day.data <- match.data(first.day.match)

## check imbalabce of matched dataset to compare to original dataset
imbalance(first.day.data$treat, data = first.day.data, 
          drop = c("treat", "treat.twit", "treat.face", "first.day", 
                   "weights", "subclass", "distance", "twitter", "facebook"), 
          weights = first.day.data$weights)

## perform logit regression analysis on new matched dataset also including 
## the activism and previously variables
first.day.model<- glm(first.day ~ treat + previously + age + gender + edu + 
                        activism + int.home + int.phone + blogs + email + phone +
                        print + tv + sms, 
                      family = binomial(link="logit"), data = first.day.data, 
                      weights = first.day.data$weights)

## show results of regression
summary(first.day.model)


####################################################################
#############    Matching previous participation    ################
####################################################################


## match data for all covariates included in the previously regression
## with treat defined as the treatment variable
previously.match <- matchit(formula = treat ~ age + gender + edu + 
                              activism + int.home + int.phone + blogs + email + phone +
                              print + tv + sms,  
                            method = "cem", cutpoints = our.cp,
                            data = complete.data.mod)

## get dataset from matched data
previously.data <- match.data(previously.match)

## check imbalabce of matched dataset to compare to original dataset
imbalance(previously.data$treat, data = previously.data, 
          drop = c("treat", "treat.twit", "treat.face", "first.day", 
                   "weights", "subclass", "distance", "twitter", "facebook", 
                   "previously"), 
          weights = previously.data$weights)

## perform logit regression analysis on new matched dataset also including 
## the activism variable
previously.model<- glm(previously ~ treat + age + gender + edu + 
                         activism + int.home + int.phone + blogs + email + phone +
                         print + tv + sms, 
                       family = binomial(link="logit"), data = previously.data, 
                       weights = previously.data$weights)

## show results of regression
summary(previously.model)


#############################################################
### Simulation for Effect of Social Media Treatment for #####
###        First Day participation matched dataset      #####
#############################################################


## define means and vcov matrix for first day simulation
first.day.coefs <- first.day.model$coef
first.day.sigma <- vcov(first.day.model)

## define vector of interest for social media treatment for first day
## treat = 1 and all other covariates held at their median
first.interest.media <- apply(X = 
                                cbind(1,first.day.data[,c(17,2,7,3,6,16,4,5,8,9,11,12,13,14)]), 
                              MARGIN = 2, FUN = median)

## define vector of interest for no social media treatment for first day
## treat = 0 and all other covariates held at their median
first.interest.no.media <- first.interest.media
first.interest.no.media[2] <- 0

## simulate betas from multivariate distribution using means and sigma for first day
first.sim.betas <- rmvnorm(n = 100000, mean = first.day.coefs, sigma = first.day.sigma)

## find expected value for probability of participating in first day of 
## protests for a non social media user  
first.no.media <- first.interest.no.media %*% t(first.sim.betas)
first.no.media.prob <- inv.logit(first.no.media)

## find expected value for probability of participating in first day of
## protests for a social media user 
first.media <- first.interest.media %*% t(first.sim.betas)
first.media.prob <- inv.logit(first.media)

## expected difference in probability of participating in first day of protests
## for a social media and non social media user
round(mean(first.media.prob) - mean(first.no.media.prob),4)

## plot density of both expected probabilities
firstdata <- data.frame()
names <- c(rep("no social media", 100000), rep("social media", 100000))
values <- c(first.no.media.prob, first.media.prob)
firstdata <- data.frame(names, values)
firstplot <- ggplot(firstdata, aes(x = values, color = names)) + geom_density(aes(fill=names), alpha =.3) + ggtitle("Expected Values - After Matching") + labs(main ="Expected Probability", x="expected probability") + theme(legend.position="top")
ggsave(filename = "figure1.png", plot = firstplot, width = 4, height = 3, scale = 1.5)

#############################################################
### Simulation for Effect of Twitter and Facebook for   #####
###     First Day participation original dataset        #####
#############################################################


## table 3 model 2 regression for first day participation from paper
table3mod2 <- glm(first.day ~ age + gender + edu + int.home + int.phone + blogs
                  + email + facebook + phone + print + tv + sms + twitter, 
                  family = "binomial", data = data.mod)

## define means and vcov matrix for first day participation simulation for paper
paper.first.coefs <- table3mod2$coef
paper.first.sigma <- vcov(table3mod2)

## define vector of interest for social media treatment for first day
## facebook and twitter = 1 and all other covariates held at their median
paper.interest.media <- apply(X = 
                                cbind(1,na.omit(data.mod[,c(7,3,6,4,5,8,9,10,11,12,13,14,15)])), 
                              MARGIN = 2, FUN = median)
paper.interest.media[14] <- 1

## define vector of interest for no social media treatment for first day
## facebook and twitter = 0 and all other covariates held at their median
paper.interest.no.media <- paper.interest.media
paper.interest.no.media[c(9,14)] <- 0

## simulate betas from multivariate distribution using means and 
## sigma for first day regression
paper.first.betas <- rmvnorm(n = 100000, mean = paper.first.coefs, sigma = paper.first.sigma)

## find expected value for probability of participating in first day of 
## protests for a non social media user
paper.no.media <- paper.interest.no.media %*% t(paper.first.betas)
paper.no.media.prob <- inv.logit(paper.no.media)

## find expected value for probability of participating in first day of
## protests for a social media user
paper.media <- paper.interest.media %*% t(paper.first.betas)
paper.media.prob <- inv.logit(paper.media)

## expected difference in probability of participating in first day of protests
## for a social media and non social media user
round(mean(paper.media.prob) - mean(paper.no.media.prob),4)

## plot density of both expected probabilities
twfirstdata <- data.frame()
names <- c(rep("no social media", 100000), rep("social media", 100000))
values <- c(paper.no.media.prob, paper.media.prob)
twfirstdata <- data.frame(names, values)
twfirstplot <- ggplot(twfirstdata, aes(x = values, color = names)) + geom_density(aes(fill=names), alpha =.3) + ggtitle("Expected Values - Original Model") + labs(main ="Expected Probability", x="expected probability") + theme(legend.position="top")
ggsave(filename = "figure2.png", plot = twfirstplot, width = 4, height = 3, scale = 1.5)
names <- c()
values <- c()

#############################################################
### Simulation for Effect of Social Media Treatment for #####
###        Previous participation matched dataset       #####
#############################################################


## define means and vcov matrix for previously simulation
previously.coefs <- previously.model$coef
previously.sigma <- vcov(previously.model)

## define vector of interest for social media treatment for previous participation
## treat = 1 and all other covariates held at their median
previously.interest.media <- apply(X = 
                                     cbind(1,previously.data[,c(17,7,3,6,16,4,5,8,9,11,12,13,14)]), 
                                   MARGIN = 2, FUN = median)

## define vector of interest for no social media treatment for previous participation
## treat = 0 and all other covariates held at their median
previously.interest.no.media <- previously.interest.media
previously.interest.no.media[2] <- 0

## simulate betas from multivariate distribution using means and sigma for previous
## participation
previously.sim.betas <- rmvnorm(n = 100000, mean = previously.coefs, 
                                sigma = previously.sigma)

## find expected value for probability of participating in previous 
## protests for a non social media user 
prev.no.media <- previously.interest.no.media %*% t(previously.sim.betas)
prev.no.media.prob <- inv.logit(prev.no.media)

## find expected value for probability of participating in previous 
## protests for a social media user 
prev.media <- previously.interest.media %*% t(previously.sim.betas)
prev.media.prob <- inv.logit(prev.media)

## expected difference in probability of participating in previous protests
## for a social media and non social media user
round(mean(prev.media.prob) - mean(prev.no.media.prob),4)

## plot density of both expected probabilities
prevdata <- data.frame()
names <- c(rep("no social media", 100000), rep("social media", 100000))
values <- c(prev.no.media.prob, prev.media.prob)
prevdata <- data.frame(names, values)
prevplot <- ggplot(prevdata, aes(x = values, color = names)) + geom_density(aes(fill=names), alpha =.3) + ggtitle("Expected Values - After Matching") + labs(main ="Expected Probability", x="expected probability") + theme(legend.position="top")
ggsave(filename = "figure3.png", plot = prevplot, width = 4, height = 3, scale = 1.5)
names <- c()
values <- c()

#############################################################
### Simulation for Effect of Twitter and Facebook for   #####
###   Previous participation using original dataset     #####
#############################################################

## table 3 model 4 regression for previous participation from paper
table3mod4 <- glm(previously ~ age + gender + edu + int.home + int.phone + blogs
                  + email + facebook + phone + print + tv + sms + twitter, 
                  family = "binomial", data = data.mod)

## define means and vcov matrix for previous participation simulation for paper
paper.prev.coefs <- table3mod4$coef
paper.prev.sigma <- vcov(table3mod4)

## define vector of interest for facebook and twitter treatment for previous 
## participation with facebook and twitter = 1 and all other covariates 
##held at their median
paper.prev.interest.media <- apply(X = 
                                     cbind(1,na.omit(data.mod[,c(7,3,6,4,5,8,9,10,11,12,13,14,15)])), 
                                   MARGIN = 2, FUN = median)
paper.prev.interest.media[14] <- 1

## define vector of interest for facebook and twitter treatment for previous 
## participation with facebook and twitter = 0 and all other covariates 
##held at their median
paper.prev.interest.no.media <- paper.prev.interest.media
paper.prev.interest.no.media[c(9,14)] <- 0

## simulate betas from multivariate distribution using means and 
## sigma from previous participation regression
paper.prev.betas <- rmvnorm(n = 100000, mean = paper.prev.coefs, sigma = paper.prev.sigma)

## find expected value for probability of participating in previous 
## protests for a non social media user
paper.prev.no.media <- paper.prev.interest.no.media %*% t(paper.prev.betas)
paper.prev.no.media.prob <- inv.logit(paper.prev.no.media)

## find expected value for probability of participating in previous 
## protests for a social media user
paper.prev.media <- paper.prev.interest.media %*% t(paper.prev.betas)
paper.prev.media.prob <- inv.logit(paper.prev.media)

## expected difference in probability of participating in previous protests
## for a social media and non social media user
round(mean(paper.prev.media.prob) - mean(paper.prev.no.media.prob),4)

## plot density of both expected probabilities
twprevdata <- data.frame()
names <- c(rep("no social media", 100000), rep("social media", 100000))
values <- c(paper.prev.no.media.prob, paper.prev.media.prob)
twprevdata <- data.frame(names, values)
twprevplot <- ggplot(twprevdata, aes(x = values, color = names)) + geom_density(aes(fill=names), alpha =.3) + ggtitle("Expected Values - Original Model") + labs(main ="Expected Probability", x="expected probability") + theme(legend.position="top")
ggsave(filename = "figure4.png", plot = twprevplot, width = 4, height = 3, scale = 1.5)
names <- c()
values <- c()

######################################################
# Analysis of the Arab Democracy Barometer - Wave II #
######################################################

# Importing the full ADB dataset from STATA format
barometer <- read.dta("ADBII_Merged_Data_file_English_FINAL_0.dta")
# Subset to the observations collected in Egypt
egypt <- subset(barometer, subset = barometer$country == "5. Egypt")

# Coding variables that correspond to those available in the Tahrir Data Set

# Internet access
egypt$internet <- 0
egypt$internet[egypt$q409 == "1. daily or almost daily"] <- 1
egypt$internet[egypt$q409 == "2. at least once a week"] <- 1
egypt$internet[egypt$q409 == "3. at least once a month"] <- 1
egypt$internet[egypt$q409 == "4. a few times a year"] <- 1

# Facebook
egypt$facebook <- 0
egypt$facebook[egypt$te4113 == "1. yes"] <- 1

# Twitter
egypt$twitter <- 0
egypt$twitter[egypt$te4114 == "1. yes"] <- 1

# Activism in civil society organizations
egypt$activism <- 0
egypt$activism[egypt$q5011 == "1. yes"] <- 1
egypt$activism[egypt$q5012 == "1. yes"] <- 1
egypt$activism[egypt$q5013 == "1. yes"] <- 1
egypt$activism[egypt$q5014 == "1. yes"] <- 1
egypt$activism[egypt$q5015 == "1. yes"] <- 1
egypt$activism[egypt$q5016 == "1. yes"] <- 1
egypt$activism[egypt$q5017 == "1. yes"] <- 1

# Participation in the January 25, 2011 protests
egypt$first.day <- 0
egypt$first.day[egypt$eg8031 == "1. yes"] <- 1

# Additional days of protest 
egypt$jan28 <- 0
egypt$jan28[egypt$eg8032 == "1. yes"] <- 1

egypt$feb2 <- 0
egypt$feb2[egypt$eg8033 == "1. yes"] <- 1

egypt$feb4 <- 0
egypt$feb4[egypt$eg8034 == "1. yes"] <- 1

egypt$feb6 <- 0
egypt$feb6[egypt$eg8035 == "1. yes"] <- 1

egypt$feb11 <- 0
egypt$feb11[egypt$eg8036 == "1. yes"] <- 1

# Sum of all days of participation in protest for count of protest intensity
egypt$total <- egypt$first.day + egypt$jan28 + egypt$feb2 + egypt$feb4 + egypt$feb6 + egypt$feb11

# Age
egypt$age <- egypt$q1001

# Gender
egypt$gender <- 0
egypt$gender[egypt$q1002 == "1. male"] <- 1

# Education levels
egypt$education <- 0
egypt$education[egypt$q1003 == "1. illiterate/literate"] <- 1
egypt$education[egypt$q1003 == "2. elementary"] <- 2
egypt$education[egypt$q1003 == "3. preparatory/basic"] <- 3
egypt$education[egypt$q1003 == "4. secondary"] <- 4
egypt$education[egypt$q1003 == "5. mid-level diploma/professional or technical"] <- 5
egypt$education[egypt$q1003 == "6. ba"] <- 6
egypt$education[egypt$q1003 == "7. ma and above"] <- 7

# Access to television
egypt$tv <- 0
egypt$tv[egypt$q4061 == "1. daily"] <- 1
egypt$tv[egypt$q4061 == "2. a few times a week"] <- 1
egypt$tv[egypt$q4061 == "3. a few times a month"] <- 1

# Print media
egypt$print <- 0
egypt$print[egypt$q4062 == "1. daily"] <- 1
egypt$print[egypt$q4062 == "2. a few times a week"] <- 1
egypt$print[egypt$q4062 == "3. a few times a month"] <- 1

# Combined social media access variable, coding 1 for Facebook and/or Twitter
egypt$social <- 0
egypt$social[egypt$te4113 == "1. yes"] <- 1
egypt$social[egypt$te4114 == "1. yes"] <- 1

# Blogs indicator
egypt$blogs <- 0
egypt$blogs[egypt$te4115 == "1. yes"] <- 1

# Urban indicator
egypt$urban <- 0
egypt$urban[egypt$q13 == "1. urban"] <- 1

# Logistic regression of participation in the first day of protests on covariates
simple <- glm(first.day ~ age + gender + education + internet + print + tv + blogs
              + social + activism + urban, family = "binomial", data = egypt)
summary(simple)

# Generating median values of interest for simulation
social.vars <- c("total", "age", "gender", "education", "internet", "print", "tv", "blogs",
                 "social", "activism", "urban")
social.variables <- egypt[social.vars]

# Population medians
medians <- apply(social.variables, MARGIN = 2, FUN = median)
medians
medians[1] <- 1

# Medians plus activist in civil society
med.activist <- medians
med.activist[10] <- 1

# Medians plus social media dummy
med.social <- medians
med.social[9] <- 1

# Medians of the Tahrir Data Set
med.ts <- medians
med.ts
med.ts[2] <- 26
med.ts[4] <- 6
med.ts[5] <- 1
med.ts[6] <- 1
med.ts[9] <- 1
med.ts[10] <- 1
med.ts[11] <- 1

med.table <- matrix(NA, nrow = 10, ncol = 2)
med.table[,1] <- t(medians[2:11])
med.table[,2] <- t(med.ts[2:11])
colnames(med.table) <- c("ADB Wave II", "Tahrir Data Set")
row.names(med.table) <- names(medians[2:11])
med.table
write.csv(med.table, file = "median comparison.csv")

# Simulating coefficients from logit regression model
set.seed(02138)
logit.sim <- rmvnorm(n = 100000,
                     mean = simple$coefficients,
                     sigma = vcov(simple))
# Expected values at population median
logit.med <- c()
for(i in 1:100000){
  logit.med[i] <- 1/(1 + exp(-1*medians %*% logit.sim[i,]))
}
mean.med <- mean(logit.med)
hist(logit.med)

# Expected values at population median + activist
logit.activist <- c()
for(i in 1:100000){
  logit.activist[i] <- 1/(1 + exp(-1*med.activist %*% logit.sim[i,]))
}
mean.activist <- mean(logit.activist)
hist(logit.activist)

# Expected values at population median + social media
logit.social <- c()
for(i in 1:100000){
  logit.social[i] <- 1/(1 + exp(-1*med.social %*% logit.sim[i,]))
}
mean.social <- mean(logit.social)
hist(logit.social)

# Expected values at tahrir median
logit.ts <- c()
for(i in 1:100000){
  logit.ts[i] <- 1/(1 + exp(-1*med.ts %*% logit.sim[i,]))
}
mean.ts <- mean(logit.ts)
hist(logit.ts)

# Generating a density plot for comparison
names_ <- c(rep("medians", 100000), rep("civil society", 100000), rep("social media", 100000), rep("tahrir", 100000))
values1 <- c(logit.med, logit.activist, logit.social, logit.ts)
logitplot <- data.frame(names_, values1)
logitdensity <- ggplot(logitplot, aes(x = values1, color = names_, fill = names_)) + geom_density(alpha = .3) + labs(x = "Probability of attending a protest")
ggsave(filename = "logitdensity.png", plot = logitdensity, width = 5, height = 2, scale = 1.5)

# Generating a table of expected values
logit.table <- matrix(NA, nrow = 2, ncol = 4)
logit.table[1,1] <- round(mean.med, digits = 4)
logit.table[1,2] <- round(mean.social, digits = 4)
logit.table[1,3] <- round(mean.activist, digits = 4)
logit.table[1,4] <- round(mean.ts, digits = 4)
logit.table[2,1] <- "-"
logit.table[2,2] <- round(mean.social - mean.med, digits = 4)
logit.table[2,3] <- round(mean.activist - mean.med, digits = 4)
logit.table[2,4] <- round(mean.ts - mean.med, digits = 4)
row.names(logit.table) <- c("Probability of attending a protest", "Difference from median")
colnames(logit.table) <- c("Median respondent", "Median + Social Media", "Median + Activist", "Tahrir Square Median")
logit.table
write.csv(logit.table, file = "logit simulation values.csv")


# Subsetting the set of variables that are of interest for matching estimation
vars <- c("first.day", "age", "gender", "education", "internet", "print", "tv", "blogs",
          "social", "activism", "urban")

# Establishing cutpoints for all relevant variables
cp.age <- c(0, 17.5, 24.5, 34.5, 44.5, 54.5, 64.5, 74.5)
cp.edu <- c(-.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5)
cp.gender <- c(-.5, .5)
cp.activism <- c(-.5, .5)
cp.internet <- c(-.5, .5)
cp.blogs <- c(-.5, .5)
cp.email <- c(-.5, .5)
cp.print <- c(-.5, .5)
cp.tv <- c(-.5, .5)
cp.urban <- c(-.5, .5)

our.cp <- list(age = cp.age, gender = cp.gender,
               edu = cp.edu, activism = cp.activism, internet = cp.internet,
               blogs = cp.blogs, email = cp.email,
               print = cp.print, tv = cp.tv, urban = cp.urban)

# Conducting coarsened exact matching on all variables, with social media as the treatment
small.egypt <- egypt[vars]
social.matches <- cem(treatment = "social", data = small.egypt, eval.imbalance = T, 
                      cutpoints = our.cp, drop = c("first.day", "social"))
social.matches

social.satt <- att(social.matches, first.day ~ age + gender + education + internet + print + 
                     tv + blogs + social + activism + urban, data = small.egypt)
social.satt

# Summary view of the protest intensity variables
mean(egypt$total)
# .181
hist(egypt$total)

# Estimating a binomial count model of protest intensity
ll.binom <- function(par, N, x, y){
  if(!all(x[,1] == 1)){
    x <- as.matrix(cbind(1,x))
  }
  pi <- 1/(1 + exp(-1*x%*%par))
  out <- sum(y * log(pi) + (N - y)*log(1-pi))
  return(out)
}

socbinom.opt <- optim(par = rep(0,11),
                      fn = ll.binom,
                      N = 6,
                      x = social.variables[,2:11],
                      y = social.variables$total,
                      control = list(fnscale = -1),
                      hessian = T,
                      method = "BFGS"
)
# Parameters from binomial count model
socbinom.opt$par
socbin.vcv <- solve(-socbinom.opt$hessian)
socbin.se <- sqrt(diag(socbin.vcv))
socbin.se

# Table of coefficients and standard errors
socbin.table <- matrix(0, nrow = 11, ncol = 3)
socbin.table[,1] <- round(socbinom.opt$par, digits = 3)
socbin.table[,2] <- round(socbin.se, digits = 3)
row.names(socbin.table) <- c("constant", social.vars[2:11])
colnames(socbin.table) <- c("Coefficient", "Standard Error", "P-value")
# Generating a p-value for each coefficient
for(i in 1:11){
  socbin.table[i,3] <- round(pnorm(abs(socbin.table[i,1]), mean = 0, sd = socbin.table[i,2], lower.tail = FALSE), digits = 3)
}
socbin.table
write.csv(socbin.table, file = "binomial count output.csv")

# Simulating from binomial model for parameters of interest
set.seed(2015)
socbin.sim <- rmvnorm(n = 100000,
                      mean = socbinom.opt$par,
                      sigma = socbin.vcv)

# Expected values at the population median
socbin.med <- c()
for(i in 1:100000){
  sim.pi <- 1/(1 + exp(-1*medians %*% socbin.sim[i,]))
  socbin.med[i] <- mean(rbinom(1000, 6, sim.pi))
}
exp.med <- mean(socbin.med)
hist(socbin.med)
perc.med <- length(socbin.med[socbin.med >= 1])/100000

# Expected values at the population median + activist
socbin.activist <- c()
for(i in 1:100000){
  sim.pi <- 1/(1 + exp(-1*med.activist %*% socbin.sim[i,]))
  socbin.activist[i] <- mean(rbinom(1000, 6, sim.pi))
}
exp.activist <- mean(socbin.activist)
hist(socbin.activist)
perc.activist <- length(socbin.activist[socbin.activist >= 1])/100000

# Expected values at the population median + social
socbin.yes <- c()
for(i in 1:100000){
  sim.pi <- 1/(1 + exp(-1*med.social %*% socbin.sim[i,]))
  socbin.yes[i] <- mean(rbinom(1000, 6, sim.pi))
}
exp.social <- mean(socbin.yes)
hist(socbin.yes)
perc.social <- length(socbin.yes[socbin.yes >= 1])/100000

# Expected values at the Tahrir median
socbin.ts <- c()
for(i in 1:100000){
  sim.pi <- 1/(1 + exp(-1*med.ts %*% socbin.sim[i,]))
  socbin.ts[i] <- mean(rbinom(1000, 6, sim.pi))
}
exp.ts <- mean(socbin.ts)
hist(socbin.ts)
perc.ts <- length(socbin.ts[socbin.ts >= 1])/100000

# Compiling into a comparative density plot
socbinplot <- data.frame()
names <- c(rep("medians", 100000), rep("civil society", 100000), rep("social media", 100000), rep("tahrir", 100000))
values <- c(socbin.med, socbin.activist, socbin.yes, socbin.ts)
socbinplot <- data.frame(names, values)
binplot <- ggplot(socbinplot, aes(x = values, color = names, fill = names)) + geom_density(alpha = .3) + labs(x = "protests attended")
ggsave(filename = "binomdensity.png", plot = binplot, width = 5, height = 2, scale = 1.5)

# Compiling table of expected values
bin.table <- matrix(NA, nrow = 2, ncol = 4)
bin.table[1,1] <- round(exp.med, digits = 4)
bin.table[1,2] <- round(exp.social, digits = 4)
bin.table[1,3] <- round(exp.activist, digits = 4)
bin.table[1,4] <- round(exp.ts, digits = 4)
bin.table[2,1] <- round(perc.med, digits = 4)
bin.table[2,2] <- round(perc.social, digits = 4)
bin.table[2,3] <- round(perc.activist, digits = 4)
bin.table[2,4] <- round(perc.ts, digits = 4)
row.names(bin.table) <- c("Expected number of protests attended", "% Predicted to Attend >= 1 Protest")
colnames(bin.table) <- c("Median respondent", "Median + Social Media", "Median + Activist", "Tahrir Square Median")
bin.table
write.csv(bin.table, file = "binomial simulation values.csv")